# Histopathology analysis with accessible plots:

> # - One color per treatment (color-blind friendly)

> # - Same symbol for all raw points

> # - Mean ± SD drawn in black (distinct from points)

> # - Dunn (Bonferroni) vs Control annotations

> # =========================================================

>

> # ---- Packages ----

> pkgs <- c("tidyverse","rstatix","car","patchwork","effsize","effectsize","viridis","ggpubr")

> to_install <- pkgs[!pkgs %in% installed.packages()[,"Package"]]

> if(length(to_install)) install.packages(to_install, dep=TRUE)

>

> library(tidyverse)

── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──

✔ dplyr 1.1.4 ✔ readr 2.1.5

✔ forcats 1.0.1 ✔ stringr 1.5.2

✔ ggplot2 4.0.0 ✔ tibble 3.3.0

✔ lubridate 1.9.4 ✔ tidyr 1.3.1

✔ purrr 1.1.0

── Conflicts ────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──

✖ lubridate::%within%() masks IRanges::%within%()

✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()

✖ dplyr::combine() masks BiocGenerics::combine()

✖ purrr::compact() masks XVector::compact()

✖ dplyr::desc() masks IRanges::desc()

✖ tidyr::expand() masks S4Vectors::expand()

✖ dplyr::filter() masks stats::filter()

✖ dplyr::first() masks S4Vectors::first()

✖ dplyr::lag() masks stats::lag()

✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()

✖ purrr::reduce() masks IRanges::reduce()

✖ dplyr::rename() masks S4Vectors::rename()

✖ lubridate::second() masks S4Vectors::second()

✖ lubridate::second<-() masks S4Vectors::second<-()

✖ dplyr::slice() masks XVector::slice(), IRanges::slice()

ℹ Use the conflicted package to force all conflicts to become errors

Warning messages:

1: package ‘ggplot2’ was built under R version 4.4.1

2: package ‘tibble’ was built under R version 4.4.1

3: package ‘purrr’ was built under R version 4.4.1

4: package ‘stringr’ was built under R version 4.4.1

5: package ‘forcats’ was built under R version 4.4.1

6: package ‘lubridate’ was built under R version 4.4.1

> library(rstatix) # kruskal_test, dunn_test, kruskal_effsize


Attaching package: ‘rstatix’


The following object is masked from ‘package:IRanges’:


desc


The following object is masked from ‘package:stats’:


filter


> library(car) # leveneTest

Loading required package: carData


Attaching package: ‘car’


The following object is masked from ‘package:dplyr’:


recode


The following object is masked from ‘package:purrr’:


some


Warning message:

package ‘car’ was built under R version 4.4.1

> library(patchwork) # plot panels

Warning message:

package ‘patchwork’ was built under R version 4.4.1

> library(effsize) # cliff.delta

> library(effectsize) # eta_squared (optional)


Attaching package: ‘effectsize’


The following objects are masked from ‘package:rstatix’:


cohens_d, eta_squared


Warning message:

package ‘effectsize’ was built under R version 4.4.1

> library(viridis) # color-blind friendly palettes

Loading required package: viridisLite

> library(ggpubr) # stat_pvalue_manual

Warning message:

package ‘ggpubr’ was built under R version 4.4.1

>

> set.seed(123)

>

> # ---- Data ----

> df <- tribble(

+ ~Treatment, ~CaDig, ~Tubule,

+ "Control", 0.18, 41.52,

+ "Control", 0.14, 78.65,

+ "Control", 0.20, 37.60,

+ "Control", 0.20, 52.21,

+ "Control", 0.21, 44.21,

+ "Control", 0.18, 46.34,

+

+ "T1", 0.21, 47.32,

+ "T1", 0.16, 50.25,

+ "T1", 0.21, 62.20,

+ "T1", 0.18, 40.26,

+ "T1", 0.20, 51.63,

+ "T1", 0.22, 42.39,

+

+ "T2", 0.35, 49.90,

+ "T2", 0.29, 63.28,

+ "T2", 0.30, 59.46,

+ "T2", 0.38, 70.84,

+ "T2", 0.40, 51.92,

+ "T2", 0.41, 70.20,

+

+ "T3", 0.50, 65.53,

+ "T3", 0.55, 113.39,

+ "T3", 0.55, 72.45,

+ "T3", 0.50, 75.28,

+ "T3", 0.56, 80.70,

+ "T3", 0.48, 70.56,

+

+ "T4", 0.71, 65.31,

+ "T4", 0.46, 97.86,

+ "T4", 0.60, 95.09,

+ "T4", 0.57, 65.93,

+ "T4", 0.69, 81.72,

+ "T4", 0.54, 95.70,

+

+ "T5", 0.46, 85.30,

+ "T5", 0.60, 62.94,

+ "T5", 0.875, 91.76,

+ "T5", 1.125, 134.12,

+ "T5", 0.50, 115.08,

+ "T5", 0.78, 124.90

+ ) %>%

+ mutate(Treatment = factor(Treatment, levels = c("Control","T1","T2","T3","T4","T5")))

>

> # ---- Assumption checks ----

> norm_tbl <- df %>%

+ pivot_longer(c(CaDig, Tubule), names_to="Metric", values_to="Value") %>%

+ group_by(Metric, Treatment) %>%

+ summarise(n = n(),

+ W = shapiro.test(Value)$statistic,

+ p_shapiro = shapiro.test(Value)$p.value,

+ .groups = "drop")

> print(norm_tbl, n=24)

# A tibble: 12 × 5

Metric Treatment n W p_shapiro

<chr> <fct> <int> <dbl> <dbl>

1 CaDig Control 6 0.871 0.231

2 CaDig T1 6 0.905 0.404

3 CaDig T2 6 0.899 0.366

4 CaDig T3 6 0.852 0.163

5 CaDig T4 6 0.956 0.792

6 CaDig T5 6 0.935 0.620

7 Tubule Control 6 0.797 0.0551

8 Tubule T1 6 0.941 0.668

9 Tubule T2 6 0.908 0.425

10 Tubule T3 6 0.769 0.0301

11 Tubule T4 6 0.820 0.0880

12 Tubule T5 6 0.955 0.784

>

> lev_cadig <- car::leveneTest(CaDig ~ Treatment, data = df, center = median)

> lev_tub <- car::leveneTest(Tubule ~ Treatment, data = df, center = median)

> flg_cadig <- fligner.test(CaDig ~ Treatment, data = df)

> flg_tub <- fligner.test(Tubule ~ Treatment, data = df)

> print(lev_cadig); print(lev_tub); print(flg_cadig); print(flg_tub)

Levene's Test for Homogeneity of Variance (center = median)

Df F value Pr(>F)

group 5 9.1112 2.356e-05 ***

30

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Levene's Test for Homogeneity of Variance (center = median)

Df F value Pr(>F)

group 5 2.1548 0.08593 .

30

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


Fligner-Killeen test of homogeneity of variances


data: CaDig by Treatment

Fligner-Killeen:med chi-squared = 22.666, df = 5, p-value = 0.000391



Fligner-Killeen test of homogeneity of variances


data: Tubule by Treatment

Fligner-Killeen:med chi-squared = 10.979, df = 5, p-value = 0.05179


>

> # ---- Descriptive stats ----

> summary_tbl <- df %>%

+ pivot_longer(c(CaDig,Tubule), names_to="Metric", values_to="Value") %>%

+ group_by(Metric, Treatment) %>%

+ summarise(n = n(), mean = mean(Value), sd = sd(Value),

+ median = median(Value), IQR = IQR(Value), .groups="drop")

> print(summary_tbl, n=12)

# A tibble: 12 × 7

Metric Treatment n mean sd median IQR

<chr> <fct> <int> <dbl> <dbl> <dbl> <dbl>

1 CaDig Control 6 0.185 0.0251 0.19 0.0200

2 CaDig T1 6 0.197 0.0225 0.205 0.025

3 CaDig T2 6 0.355 0.0509 0.365 0.0825

4 CaDig T3 6 0.523 0.0339 0.525 0.0500

5 CaDig T4 6 0.595 0.0940 0.585 0.12

6 CaDig T5 6 0.723 0.254 0.69 0.326

7 Tubule Control 6 50.1 14.8 45.3 8.55

8 Tubule T1 6 49.0 7.82 48.8 7.66

9 Tubule T2 6 60.9 8.88 61.4 14.7

10 Tubule T3 6 79.7 17.3 73.9 8.31

11 Tubule T4 6 83.6 15.0 88.4 25.7

12 Tubule T5 6 102. 27.0 103. 35.5

>

> # ---- Kruskal–Wallis + global effect size (eta2[H]) ----

> kw_cadig <- rstatix::kruskal_test(df, CaDig ~ Treatment)

> kw_tub <- rstatix::kruskal_test(df, Tubule ~ Treatment)

> print(kw_cadig); print(kw_tub)

# A tibble: 1 × 6

.y. n statistic df p method

* <chr> <int> <dbl> <int> <dbl> <chr>

1 CaDig 36 30.1 5 0.0000142 Kruskal-Wallis

# A tibble: 1 × 6

.y. n statistic df p method

* <chr> <int> <dbl> <int> <dbl> <chr>

1 Tubule 36 23.5 5 0.000265 Kruskal-Wallis

>

> eff_kw_cadig <- rstatix::kruskal_effsize(df, CaDig ~ Treatment) # eta2[H]

> eff_kw_tub <- rstatix::kruskal_effsize(df, Tubule ~ Treatment)

> print(eff_kw_cadig); print(eff_kw_tub)

# A tibble: 1 × 5

.y. n effsize method magnitude

* <chr> <int> <dbl> <chr> <ord>

1 CaDig 36 0.836 eta2[H] large

# A tibble: 1 × 5

.y. n effsize method magnitude

* <chr> <int> <dbl> <chr> <ord>

1 Tubule 36 0.618 eta2[H] large

>

> # ---- Dunn (Bonferroni), then filter vs Control ----

> dunn_cadig_all <- df %>% rstatix::dunn_test(CaDig ~ Treatment, p.adjust.method = "bonferroni")

> dunn_tub_all <- df %>% rstatix::dunn_test(Tubule ~ Treatment, p.adjust.method = "bonferroni")

>

> keep_vs_control <- function(tbl, control="Control"){

+ tbl %>%

+ dplyr::filter(group1 == control | group2 == control) %>%

+ dplyr::mutate(

+ grp2 = ifelse(group1 == control, group2, group1),

+ group1 = control,

+ group2 = grp2

+ ) %>%

+ dplyr::select(-grp2) %>%

+ dplyr::arrange(group2)

+ }

> dunn_cadig <- keep_vs_control(dunn_cadig_all, "Control")

> dunn_tub <- keep_vs_control(dunn_tub_all, "Control")

>

> p_stars <- function(p) dplyr::case_when(

+ p <= 0.001 ~ "***",

+ p <= 0.01 ~ "**",

+ p <= 0.05 ~ "*",

+ TRUE ~ "ns"

+ )

> dunn_cadig <- dunn_cadig %>% dplyr::mutate(label = p_stars(p.adj))

> dunn_tub <- dunn_tub %>% dplyr::mutate(label = p_stars(p.adj))

>

> # ---- Pairwise effect size: Cliff's delta vs Control ----

> levels_trt <- levels(df$Treatment)

> ref <- "Control"

> cd_cadig <- purrr::map_dfr(levels_trt[-1], ~{

+ x <- df$CaDig[df$Treatment==.x]; y <- df$CaDig[df$Treatment==ref]

+ d <- effsize::cliff.delta(x, y)

+ tibble(Metric="CaDig", group1=ref, group2=.x, cliffs_delta=d$estimate, magnitude=d$magnitude)

+ })

Warning messages:

1: In cliff.delta.default(x, y) :

The samples are fully disjoint, using approximate Confidence Interval estimation

2: In cliff.delta.default(x, y) :

The samples are fully disjoint, using approximate Confidence Interval estimation

3: In cliff.delta.default(x, y) :

The samples are fully disjoint, using approximate Confidence Interval estimation

4: In cliff.delta.default(x, y) :

The samples are fully disjoint, using approximate Confidence Interval estimation

> cd_tub <- purrr::map_dfr(levels_trt[-1], ~{

+ x <- df$Tubule[df$Treatment==.x]; y <- df$Tubule[df$Treatment==ref]

+ d <- effsize::cliff.delta(x, y)

+ tibble(Metric="Tubule", group1=ref, group2=.x, cliffs_delta=d$estimate, magnitude=d$magnitude)

+ })

>

> # ---- Build significance annotations (only p < 0.05) ----

> max_mean_sd <- function(v) {

+ df %>% group_by(Treatment) %>% summarise(m=mean(.data[[v]]), s=sd(.data[[v]]), .groups="drop") %>%

+ summarise(max(m+s)) %>% pull()

+ }

> max_cadig <- max_mean_sd("CaDig")

> max_tub <- max_mean_sd("Tubule")

>

> sig_cadig <- dunn_cadig %>% filter(p.adj < 0.05) %>%

+ mutate(y.position = seq(from = max_cadig*1.08, by = max_cadig*0.06, length.out = n()))

> sig_tub <- dunn_tub %>% filter(p.adj < 0.05) %>%

+ mutate(y.position = seq(from = max_tub*1.08, by = max_tub*0.06, length.out = n()))

>

> # ---- Plotting (one color per treatment, same shape for all points; mean±SD in black) ----

> base_theme <- theme_minimal(base_size = 12) +

+ theme(panel.grid.major.x = element_blank(),

+ axis.text.x = element_text(size = 11),

+ legend.position = "right")

>

> make_plot <- function(data, yvar, ylab, sig_df=NULL){

+ # Summary for mean ± SD

+ sumdf <- data %>%

+ group_by(Treatment) %>%

+ summarise(mean = mean(.data[[yvar]]),

+ sd = sd(.data[[yvar]]),

+ .groups="drop")

+

+ p <- ggplot(data, aes(x = Treatment, y = .data[[yvar]])) +

+ # Raw points: one color per treatment, same symbol (circles)

+ geom_jitter(aes(color = Treatment),

+ width = 0.12, height = 0, alpha = 0.85, size = 2.6) +

+ # Mean ± SD: draw in black (distinct from raw points)

+ geom_errorbar(data = sumdf,

+ aes(x = Treatment, y = mean, ymin = mean - sd, ymax = mean + sd),

+ width = 0.22, size = 0.7, color = "black", inherit.aes = FALSE) +

+ geom_point(data = sumdf, aes(x = Treatment, y = mean),

+ size = 3.2, color = "black", inherit.aes = FALSE) +

+ scale_color_viridis_d(option = "cividis", begin = 0.05, end = 0.95, name = "Treatment") +

+ labs(x = NULL, y = ylab) +

+ base_theme

+

+ if(!is.null(sig_df) && nrow(sig_df) > 0){

+ p <- p + ggpubr::stat_pvalue_manual(

+ sig_df,

+ label = "label",

+ y.position = "y.position",

+ tip.length = 0.01,

+ hide.ns = TRUE,

+ bracket.size = 0.45,

+ size = 3.5,

+ color = "black"

+ )

+ }

+ p

+ }

>

> p1 <- make_plot(df, "CaDig", "Ca/Dig (mean ± SD)", sig_cadig)

Warning message:

Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.

ℹ Please use `linewidth` instead.

This warning is displayed once every 8 hours.

Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

> p2 <- make_plot(df, "Tubule","Tubule diameter (µm; mean ± SD)", sig_tub)

>

> panel <- p1 / p2 + plot_annotation(

+ title = "Histopathology indices across treatments (mean ± SD)\nAnnotations: Dunn (Bonferroni) vs Control",

+ theme = theme(plot.title = element_text(face="bold", size=13))

+ )

>

> # ---- Show and export ----

> print(panel)

> ggsave("histopathology_mean_sd_annotated_singleShape.png", panel, width = 9.5, height = 8, dpi = 300)

> ggsave("histopathology_mean_sd_annotated_singleShape.tiff", panel, width = 9.5, height = 8, dpi = 300, compression = "lzw")

> ggsave("histopathology_mean_sd_annotated_singleShape.pdf", panel, width = 9.5, height = 8, device = cairo_pdf)

>

> # ---- Export tables ----

> readr::write_csv(norm_tbl, "normality_shapiro_by_group.csv")

> readr::write_csv(summary_tbl, "summary_mean_sd_median_IQR.csv")

> readr::write_csv(dunn_cadig, "dunn_vs_control_CaDig_bonferroni.csv")

> readr::write_csv(dunn_tub, "dunn_vs_control_Tubule_bonferroni.csv")

> readr::write_csv(cd_cadig, "cliffs_delta_vs_control_CaDig.csv")

> readr::write_csv(cd_tub, "cliffs_delta_vs_control_Tubule.csv")

> capture.output(kw_cadig, file="kw_CaDig.txt")

> capture.output(kw_tub, file="kw_Tubule.txt")

> capture.output(eff_kw_cadig, file="kw_effsize_eta2H_CaDig.txt")

> capture.output(eff_kw_tub, file="kw_effsize_eta2H_Tubule.txt")

>

> cat("\nDone ✅\nAccessible figures generated with:\n",

+ "- One color per treatment (cividis) & same symbol for all raw points.\n",

+ "- Mean ± SD in black, clearly distinguished from points.\n")


Done ✅

Accessible figures generated with:

- One color per treatment (cividis) & same symbol for all raw points.

- Mean ± SD in black, clearly distinguished from points.